1 Introduction

The objectives of this notebook are:

  • Visualize how well we could remove technical variability associated with the assay (3’, 5’ or multiome). We should see a high degree of intermixing between the 3 assays.
  • Visualize how well we could preserve the biological variability. We will use the annotation of King et al., as a positive control, so that the data integration should preserve the cell type annotations.
  • Plot several markers of doublets: lineage markers, pDNN, scrublet predictions, number of features, etc. The broader objective of level 2 is to eliminate most remaining doublets and poor-quality cells, as we will discuss in future notebooks. These visualizations will allow us to explore this. We will also plot their location in the UMAP obtained at level 1.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(tidyverse)

2.2 Parameters

# Paths
path_to_obj <- str_c(
  here::here("scRNA-seq/results/R_objects/level_2/"),
  cell_type,
  "/",
  cell_type,
  "_integrated_level_2.rds",
  sep = ""
)


# Functions
source(here::here("scRNA-seq/bin/utils.R"))


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "gold", "antiquewhite", "chocolate1", "coral2",
                   "blueviolet", "brown1", "darkmagenta", "deepskyblue1",
                   "dimgray", "deeppink1", "green", "lightgray", "hotpink1",
                   "indianred4", "khaki", "mediumorchid2")

2.3 Load data

# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 37378 features across 12520 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony

3 Assess integration

3.1 Removal of technical variability

p_assay <- plot_split_umap(seurat, var = "assay")
p_assay

3.2 Preservation of biological variability

p_king <- plot_annotation_king(seurat, color_palette = color_palette) +
  guides(colour = guide_legend(override.aes = list(size = 2.5)))
p_king

4 Spot potential doublets

4.1 Lineage markers

canonical_markers <- c("CD79A", "CD79B", "CD3D", "CD3E", "NKG7", "LYZ",
                       "IGHD", "IGHM", "IGHA1", "IGHG1", "FDCSP", "PTCRA",
                       "XBP1", "TOP2A", "KRT19", "SPRR3", "DNTT", "VPREB1")
canonical_markers_gg <- purrr::map(canonical_markers, function(x) {
  p <- feature_plot_doublets(seurat_obj = seurat, feature = x)
  p
})
names(canonical_markers_gg) <- canonical_markers
canonical_markers_gg
## $CD79A

## 
## $CD79B

## 
## $CD3D

## 
## $CD3E

## 
## $NKG7

## 
## $LYZ

## 
## $IGHD

## 
## $IGHM

## 
## $IGHA1

## 
## $IGHG1

## 
## $FDCSP

## 
## $PTCRA

## 
## $XBP1

## 
## $TOP2A

## 
## $KRT19

## 
## $SPRR3

## 
## $DNTT

## 
## $VPREB1

4.2 pDNN and scrublet prediction

# pDNN
pDNN_vars <- c("pDNN_hashing", "pDNN_scrublet", "pDNN_union")
pDNN_gg <- purrr::map(pDNN_vars, function(x) {
  p <- plot_pDNN(seurat_obj = seurat, pDNN_var = x, pt_size = 0.2)
  p
})
pDNN_gg
## [[1]]

## 
## [[2]]

## 
## [[3]]

# Scrublet
seurat$scrublet_predicted_doublet[seurat$scrublet_predicted_doublet == "True"] <- "TRUE"
seurat$scrublet_predicted_doublet[seurat$scrublet_predicted_doublet == "False"] <- "FALSE"
DimPlot(seurat, group.by = "scrublet_predicted_doublet")

4.3 QC metrics

qc_vars <- c(
  "nCount_RNA",
  "nFeature_RNA",
  "pct_mt",
  "pct_ribosomal"
)
qc_gg <- purrr::map(qc_vars, function(x) {
  p <- FeaturePlot(seurat, features = x, cols = viridis::viridis(10))
  p
})
qc_gg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

4.4 UMAP level 1

umap_level_1 <- FeatureScatter(
  seurat,
  "UMAP_1_level_1",
  "UMAP_2_level_1",
  group.by = "seurat_clusters"
)
umap_level_1 <- umap_level_1 +
  theme(
    legend.position = "none",
    plot.title = element_blank()
  )
umap_level_1

4.5 Cell cycle scoring

s_gg <- feature_plot_doublets(seurat_obj = seurat, feature = "S.Score")
g2m_gg <- feature_plot_doublets(seurat_obj = seurat, feature = "G2M.Score")
s_gg

g2m_gg

5 Session Information

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
## 
## Matrix products: default
## BLAS:   /apps/R/3.6.0/lib64/R/lib/libRblas.so
## LAPACK: /home/devel/rmassoni/anaconda3/lib/libmkl_rt.so
## 
## locale:
##  [1] LC_CTYPE=C                 LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.0    stringr_1.4.0    dplyr_1.0.4      purrr_0.3.4      readr_1.3.1      tidyr_1.1.0      tibble_3.0.1     ggplot2_3.3.0    tidyverse_1.3.0  Seurat_3.2.0     BiocStyle_2.14.4
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15            colorspace_1.4-1      deldir_0.1-25         ellipsis_0.3.1        ggridges_0.5.2        rprojroot_1.3-2       fs_1.4.1              rstudioapi_0.11       spatstat.data_1.4-3   farver_2.0.3          leiden_0.3.3          listenv_0.8.0         ggrepel_0.8.2         fansi_0.4.1           lubridate_1.7.8       xml2_1.3.2            codetools_0.2-16      splines_3.6.0         knitr_1.28            polyclip_1.10-0       jsonlite_1.7.2        broom_0.5.6           ica_1.0-2             cluster_2.1.0         dbplyr_1.4.4          png_0.1-7             uwot_0.1.8            shiny_1.4.0.2         sctransform_0.2.1     BiocManager_1.30.10   compiler_3.6.0        httr_1.4.2            backports_1.1.7       assertthat_0.2.1      Matrix_1.2-18         fastmap_1.0.1         lazyeval_0.2.2        cli_2.0.2             later_1.0.0           htmltools_0.5.1.1     tools_3.6.0           rsvd_1.0.3            igraph_1.2.5          gtable_0.3.0          glue_1.4.1            RANN_2.6.1            reshape2_1.4.4        rappdirs_0.3.1        Rcpp_1.0.6            spatstat_1.64-1       cellranger_1.1.0      vctrs_0.3.6           ape_5.3               nlme_3.1-148         
##  [55] lmtest_0.9-37         xfun_0.14             globals_0.12.5        rvest_0.3.5           mime_0.9              miniUI_0.1.1.1        lifecycle_0.2.0       irlba_2.3.3           goftest_1.2-2         future_1.17.0         MASS_7.3-51.6         zoo_1.8-8             scales_1.1.1          hms_0.5.3             promises_1.1.0        spatstat.utils_1.17-0 parallel_3.6.0        RColorBrewer_1.1-2    yaml_2.2.1            reticulate_1.16       pbapply_1.4-2         gridExtra_2.3         rpart_4.1-15          stringi_1.4.6         rlang_0.4.10          pkgconfig_2.0.3       evaluate_0.14         lattice_0.20-41       ROCR_1.0-11           tensor_1.5            labeling_0.3          patchwork_1.0.0       htmlwidgets_1.5.1     cowplot_1.0.0         tidyselect_1.1.0      here_0.1              RcppAnnoy_0.0.16      plyr_1.8.6            magrittr_1.5          bookdown_0.19         R6_2.4.1              generics_0.0.2        DBI_1.1.0             withr_2.4.1           pillar_1.4.4          haven_2.3.1           mgcv_1.8-31           fitdistrplus_1.1-1    survival_3.1-12       abind_1.4-5           future.apply_1.5.0    modelr_0.1.8          crayon_1.3.4          KernSmooth_2.23-17   
## [109] plotly_4.9.2.1        rmarkdown_2.2         viridis_0.5.1         readxl_1.3.1          grid_3.6.0            data.table_1.12.8     blob_1.2.1            reprex_0.3.0          digest_0.6.20         xtable_1.8-4          httpuv_1.5.3.1        munsell_0.5.0         viridisLite_0.3.0